import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
sns.set_style("darkgrid")
df=pd.read_csv('C:/Users/Ambuj nayan mishra/Downloads/hotel_booking.csv')
df.head()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | name | phone-number | credit_card | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Resort Hotel | 0 | 342 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | Transient | 0.0 | 0 | 0 | Check-Out | 2015-07-01 | Ernest Barnes | Ernest.Barnes31@outlook.com | 669-792-1661 | ************4322 |
| 1 | Resort Hotel | 0 | 737 | 2015 | July | 27 | 1 | 0 | 0 | 2 | ... | Transient | 0.0 | 0 | 0 | Check-Out | 2015-07-01 | Andrea Baker | Andrea_Baker94@aol.com | 858-637-6955 | ************9157 |
| 2 | Resort Hotel | 0 | 7 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | Transient | 75.0 | 0 | 0 | Check-Out | 2015-07-02 | Rebecca Parker | Rebecca_Parker@comcast.net | 652-885-2745 | ************3734 |
| 3 | Resort Hotel | 0 | 13 | 2015 | July | 27 | 1 | 0 | 1 | 1 | ... | Transient | 75.0 | 0 | 0 | Check-Out | 2015-07-02 | Laura Murray | Laura_M@gmail.com | 364-656-8427 | ************5677 |
| 4 | Resort Hotel | 0 | 14 | 2015 | July | 27 | 1 | 0 | 2 | 2 | ... | Transient | 98.0 | 0 | 1 | Check-Out | 2015-07-03 | Linda Hines | LHines@verizon.com | 713-226-5883 | ************5498 |
5 rows × 36 columns
df.tail()
| hotel | is_canceled | lead_time | arrival_date_year | arrival_date_month | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | ... | customer_type | adr | required_car_parking_spaces | total_of_special_requests | reservation_status | reservation_status_date | name | phone-number | credit_card | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 119385 | City Hotel | 0 | 23 | 2017 | August | 35 | 30 | 2 | 5 | 2 | ... | Transient | 96.14 | 0 | 0 | Check-Out | 2017-09-06 | Claudia Johnson | Claudia.J@yahoo.com | 403-092-5582 | ************8647 |
| 119386 | City Hotel | 0 | 102 | 2017 | August | 35 | 31 | 2 | 5 | 3 | ... | Transient | 225.43 | 0 | 2 | Check-Out | 2017-09-07 | Wesley Aguilar | WAguilar@xfinity.com | 238-763-0612 | ************4333 |
| 119387 | City Hotel | 0 | 34 | 2017 | August | 35 | 31 | 2 | 5 | 2 | ... | Transient | 157.71 | 0 | 4 | Check-Out | 2017-09-07 | Mary Morales | Mary_Morales@hotmail.com | 395-518-4100 | ************1821 |
| 119388 | City Hotel | 0 | 109 | 2017 | August | 35 | 31 | 2 | 5 | 2 | ... | Transient | 104.40 | 0 | 0 | Check-Out | 2017-09-07 | Caroline Conley MD | MD_Caroline@comcast.net | 531-528-1017 | ************7860 |
| 119389 | City Hotel | 0 | 205 | 2017 | August | 35 | 29 | 2 | 7 | 2 | ... | Transient | 151.20 | 0 | 2 | Check-Out | 2017-09-07 | Ariana Michael | Ariana_M@xfinity.com | 422-804-6403 | ************4482 |
5 rows × 36 columns
This dataset contains 119390 observations for a City Hotel and a Resort Hotel. Each observation represents a hotel booking between the 1st of July 2015 and 31st of August 2017, including booking that effectively arrived and booking that were canceled.
Since this is hotel real data, all data elements pertaining hotel or costumer identification were deleted. Four Columns, 'name', 'email', 'phone number' and 'credit_card' have been artificially created and added to the dataset.
The data is originally from the article Hotel Booking Demand Datasets, written by Nuno Antonio, Ana Almeida, and Luis Nunes for Data in Brief, Volume 22, February 2019.
| No | Columns name | Meaning |
|---|---|---|
| 1 | hotel | The datasets contains the booking information of two hotel. One of the hotels is a resort hotel and the other is a city |
| 2 | is_canceled | Value indicating if the booking was canceled (1) or not (0). |
| 3 | country | Country of origin. |
| 4 | market_segment | Market segment designation. In categories, the term “TA” means “Travel Agents” and “TO” means “Tour Operators”. |
| 5 | adr | Average Daily Rate (Calculated by dividing the sum of all lodging transactions by the total number of staying nights). |
r,c=df.shape
print(f'number of rows:{r}')
print(f'number of columns:{c}')
number of rows:119390 number of columns:36
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 119390 entries, 0 to 119389 Data columns (total 36 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 hotel 119390 non-null object 1 is_canceled 119390 non-null int64 2 lead_time 119390 non-null int64 3 arrival_date_year 119390 non-null int64 4 arrival_date_month 119390 non-null object 5 arrival_date_week_number 119390 non-null int64 6 arrival_date_day_of_month 119390 non-null int64 7 stays_in_weekend_nights 119390 non-null int64 8 stays_in_week_nights 119390 non-null int64 9 adults 119390 non-null int64 10 children 119386 non-null float64 11 babies 119390 non-null int64 12 meal 119390 non-null object 13 country 118902 non-null object 14 market_segment 119390 non-null object 15 distribution_channel 119390 non-null object 16 is_repeated_guest 119390 non-null int64 17 previous_cancellations 119390 non-null int64 18 previous_bookings_not_canceled 119390 non-null int64 19 reserved_room_type 119390 non-null object 20 assigned_room_type 119390 non-null object 21 booking_changes 119390 non-null int64 22 deposit_type 119390 non-null object 23 agent 103050 non-null float64 24 company 6797 non-null float64 25 days_in_waiting_list 119390 non-null int64 26 customer_type 119390 non-null object 27 adr 119390 non-null float64 28 required_car_parking_spaces 119390 non-null int64 29 total_of_special_requests 119390 non-null int64 30 reservation_status 119390 non-null object 31 reservation_status_date 119390 non-null object 32 name 119390 non-null object 33 email 119390 non-null object 34 phone-number 119390 non-null object 35 credit_card 119390 non-null object dtypes: float64(4), int64(16), object(16) memory usage: 32.8+ MB
df.describe()
| is_canceled | lead_time | arrival_date_year | arrival_date_week_number | arrival_date_day_of_month | stays_in_weekend_nights | stays_in_week_nights | adults | children | babies | is_repeated_guest | previous_cancellations | previous_bookings_not_canceled | booking_changes | agent | company | days_in_waiting_list | adr | required_car_parking_spaces | total_of_special_requests | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119386.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 103050.000000 | 6797.000000 | 119390.000000 | 119390.000000 | 119390.000000 | 119390.000000 |
| mean | 0.370416 | 104.011416 | 2016.156554 | 27.165173 | 15.798241 | 0.927599 | 2.500302 | 1.856403 | 0.103890 | 0.007949 | 0.031912 | 0.087118 | 0.137097 | 0.221124 | 86.693382 | 189.266735 | 2.321149 | 101.831122 | 0.062518 | 0.571363 |
| std | 0.482918 | 106.863097 | 0.707476 | 13.605138 | 8.780829 | 0.998613 | 1.908286 | 0.579261 | 0.398561 | 0.097436 | 0.175767 | 0.844336 | 1.497437 | 0.652306 | 110.774548 | 131.655015 | 17.594721 | 50.535790 | 0.245291 | 0.792798 |
| min | 0.000000 | 0.000000 | 2015.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 6.000000 | 0.000000 | -6.380000 | 0.000000 | 0.000000 |
| 25% | 0.000000 | 18.000000 | 2016.000000 | 16.000000 | 8.000000 | 0.000000 | 1.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 9.000000 | 62.000000 | 0.000000 | 69.290000 | 0.000000 | 0.000000 |
| 50% | 0.000000 | 69.000000 | 2016.000000 | 28.000000 | 16.000000 | 1.000000 | 2.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 14.000000 | 179.000000 | 0.000000 | 94.575000 | 0.000000 | 0.000000 |
| 75% | 1.000000 | 160.000000 | 2017.000000 | 38.000000 | 23.000000 | 2.000000 | 3.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 229.000000 | 270.000000 | 0.000000 | 126.000000 | 0.000000 | 1.000000 |
| max | 1.000000 | 737.000000 | 2017.000000 | 53.000000 | 31.000000 | 19.000000 | 50.000000 | 55.000000 | 10.000000 | 10.000000 | 1.000000 | 26.000000 | 72.000000 | 21.000000 | 535.000000 | 543.000000 | 391.000000 | 5400.000000 | 8.000000 | 5.000000 |
df.describe(include='object')
| hotel | arrival_date_month | meal | country | market_segment | distribution_channel | reserved_room_type | assigned_room_type | deposit_type | customer_type | reservation_status | reservation_status_date | name | phone-number | credit_card | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 119390 | 119390 | 119390 | 118902 | 119390 | 119390 | 119390 | 119390 | 119390 | 119390 | 119390 | 119390 | 119390 | 119390 | 119390 | 119390 |
| unique | 2 | 12 | 5 | 177 | 8 | 5 | 10 | 12 | 3 | 4 | 3 | 926 | 81503 | 115889 | 119390 | 9000 |
| top | City Hotel | August | BB | PRT | Online TA | TA/TO | A | A | No Deposit | Transient | Check-Out | 2015-10-21 | Michael Johnson | Michael.C@gmail.com | 669-792-1661 | ************4923 |
| freq | 79330 | 13877 | 92310 | 48590 | 56477 | 97870 | 85994 | 74053 | 104641 | 89613 | 75166 | 1461 | 48 | 6 | 1 | 28 |
for i in df.describe(include='object').columns:
print(i)
print(df[i].unique())
print('-------------------------------------------------')
hotel ['Resort Hotel' 'City Hotel'] ------------------------------------------------- arrival_date_month ['July' 'August' 'September' 'October' 'November' 'December' 'January' 'February' 'March' 'April' 'May' 'June'] ------------------------------------------------- meal ['BB' 'FB' 'HB' 'SC' 'Undefined'] ------------------------------------------------- country ['PRT' 'GBR' 'USA' 'ESP' 'IRL' 'FRA' nan 'ROU' 'NOR' 'OMN' 'ARG' 'POL' 'DEU' 'BEL' 'CHE' 'CN' 'GRC' 'ITA' 'NLD' 'DNK' 'RUS' 'SWE' 'AUS' 'EST' 'CZE' 'BRA' 'FIN' 'MOZ' 'BWA' 'LUX' 'SVN' 'ALB' 'IND' 'CHN' 'MEX' 'MAR' 'UKR' 'SMR' 'LVA' 'PRI' 'SRB' 'CHL' 'AUT' 'BLR' 'LTU' 'TUR' 'ZAF' 'AGO' 'ISR' 'CYM' 'ZMB' 'CPV' 'ZWE' 'DZA' 'KOR' 'CRI' 'HUN' 'ARE' 'TUN' 'JAM' 'HRV' 'HKG' 'IRN' 'GEO' 'AND' 'GIB' 'URY' 'JEY' 'CAF' 'CYP' 'COL' 'GGY' 'KWT' 'NGA' 'MDV' 'VEN' 'SVK' 'FJI' 'KAZ' 'PAK' 'IDN' 'LBN' 'PHL' 'SEN' 'SYC' 'AZE' 'BHR' 'NZL' 'THA' 'DOM' 'MKD' 'MYS' 'ARM' 'JPN' 'LKA' 'CUB' 'CMR' 'BIH' 'MUS' 'COM' 'SUR' 'UGA' 'BGR' 'CIV' 'JOR' 'SYR' 'SGP' 'BDI' 'SAU' 'VNM' 'PLW' 'QAT' 'EGY' 'PER' 'MLT' 'MWI' 'ECU' 'MDG' 'ISL' 'UZB' 'NPL' 'BHS' 'MAC' 'TGO' 'TWN' 'DJI' 'STP' 'KNA' 'ETH' 'IRQ' 'HND' 'RWA' 'KHM' 'MCO' 'BGD' 'IMN' 'TJK' 'NIC' 'BEN' 'VGB' 'TZA' 'GAB' 'GHA' 'TMP' 'GLP' 'KEN' 'LIE' 'GNB' 'MNE' 'UMI' 'MYT' 'FRO' 'MMR' 'PAN' 'BFA' 'LBY' 'MLI' 'NAM' 'BOL' 'PRY' 'BRB' 'ABW' 'AIA' 'SLV' 'DMA' 'PYF' 'GUY' 'LCA' 'ATA' 'GTM' 'ASM' 'MRT' 'NCL' 'KIR' 'SDN' 'ATF' 'SLE' 'LAO'] ------------------------------------------------- market_segment ['Direct' 'Corporate' 'Online TA' 'Offline TA/TO' 'Complementary' 'Groups' 'Undefined' 'Aviation'] ------------------------------------------------- distribution_channel ['Direct' 'Corporate' 'TA/TO' 'Undefined' 'GDS'] ------------------------------------------------- reserved_room_type ['C' 'A' 'D' 'E' 'G' 'F' 'H' 'L' 'P' 'B'] ------------------------------------------------- assigned_room_type ['C' 'A' 'D' 'E' 'G' 'F' 'I' 'B' 'H' 'P' 'L' 'K'] ------------------------------------------------- deposit_type ['No Deposit' 'Refundable' 'Non Refund'] ------------------------------------------------- customer_type ['Transient' 'Contract' 'Transient-Party' 'Group'] ------------------------------------------------- reservation_status ['Check-Out' 'Canceled' 'No-Show'] ------------------------------------------------- reservation_status_date ['2015-07-01' '2015-07-02' '2015-07-03' '2015-05-06' '2015-04-22' '2015-06-23' '2015-07-05' '2015-07-06' '2015-07-07' '2015-07-08' '2015-05-11' '2015-07-15' '2015-07-16' '2015-05-29' '2015-05-19' '2015-06-19' '2015-05-23' '2015-05-18' '2015-07-09' '2015-06-02' '2015-07-13' '2015-07-04' '2015-06-29' '2015-06-16' '2015-06-18' '2015-06-12' '2015-06-09' '2015-05-26' '2015-07-11' '2015-07-12' '2015-07-17' '2015-04-15' '2015-05-13' '2015-07-10' '2015-05-20' '2015-05-12' '2015-07-14' '2015-06-17' '2015-05-01' '2015-03-30' '2015-07-19' '2015-06-03' '2015-06-26' '2015-05-14' '2015-07-20' '2015-05-07' '2015-05-28' '2015-04-13' '2015-03-25' '2015-07-21' '2015-06-27' '2015-07-18' '2015-07-23' '2015-06-08' '2015-06-22' '2015-06-24' '2015-03-05' '2015-06-01' '2015-04-24' '2015-07-22' '2015-05-27' '2015-04-06' '2015-04-11' '2015-07-25' '2015-07-28' '2015-07-29' '2015-06-25' '2015-07-24' '2015-06-05' '2015-06-30' '2015-06-13' '2015-06-11' '2015-07-30' '2015-07-27' '2015-04-29' '2015-06-04' '2015-07-26' '2015-08-01' '2015-08-02' '2015-06-15' '2015-04-23' '2015-07-31' '2015-05-25' '2015-08-03' '2015-04-17' '2015-08-04' '2015-08-06' '2015-05-15' '2015-05-09' '2015-03-17' '2015-05-22' '2015-08-07' '2015-04-04' '2015-08-05' '2015-08-08' '2015-08-10' '2015-05-04' '2015-06-06' '2015-08-09' '2015-08-15' '2015-08-11' '2015-03-28' '2015-08-14' '2015-08-12' '2015-08-16' '2015-05-16' '2015-08-21' '2015-08-13' '2015-08-17' '2015-04-20' '2015-08-18' '2015-08-23' '2015-08-22' '2015-08-19' '2015-08-20' '2015-08-29' '2015-03-31' '2015-05-30' '2015-08-25' '2015-04-14' '2015-08-24' '2015-03-24' '2015-05-21' '2015-08-28' '2015-08-26' '2015-08-27' '2015-08-30' '2015-08-31' '2015-09-06' '2015-09-03' '2015-09-04' '2015-09-02' '2015-09-01' '2015-09-05' '2015-06-20' '2015-09-07' '2015-09-10' '2015-09-11' '2015-09-08' '2015-09-09' '2015-09-13' '2015-09-15' '2015-04-10' '2015-01-02' '2014-11-18' '2015-09-12' '2015-09-17' '2015-09-14' '2015-04-07' '2015-09-19' '2015-09-16' '2015-09-20' '2015-01-18' '2015-10-23' '2015-01-22' '2015-01-01' '2015-09-22' '2015-09-24' '2015-09-18' '2015-09-21' '2015-09-30' '2015-09-25' '2015-09-27' '2015-09-28' '2015-10-12' '2015-09-29' '2015-09-23' '2015-10-01' '2015-09-26' '2015-04-18' '2015-10-02' '2015-10-04' '2015-10-08' '2015-10-03' '2015-10-07' '2015-10-09' '2015-10-11' '2015-10-05' '2015-10-06' '2015-10-10' '2015-10-14' '2015-10-15' '2015-10-18' '2015-10-13' '2015-10-20' '2015-10-19' '2015-10-31' '2015-10-16' '2015-10-21' '2015-10-22' '2015-10-17' '2015-10-24' '2015-10-25' '2015-10-28' '2015-10-27' '2015-10-26' '2015-10-30' '2015-11-05' '2015-10-29' '2015-11-03' '2015-11-07' '2015-11-04' '2015-11-01' '2015-11-02' '2015-11-17' '2015-11-06' '2015-11-10' '2015-11-08' '2015-11-09' '2015-11-15' '2015-11-16' '2015-11-11' '2015-11-12' '2015-11-14' '2015-11-13' '2015-11-18' '2015-11-22' '2015-11-19' '2015-11-21' '2015-11-20' '2015-11-24' '2015-11-25' '2015-11-23' '2015-11-28' '2015-11-26' '2015-11-27' '2015-11-29' '2015-12-04' '2015-12-01' '2015-12-06' '2015-12-08' '2015-12-02' '2015-12-03' '2015-12-31' '2015-12-05' '2015-12-10' '2015-12-17' '2015-11-30' '2015-12-12' '2015-12-07' '2016-01-05' '2015-12-11' '2015-12-13' '2015-12-15' '2015-12-16' '2015-12-19' '2015-12-18' '2015-12-26' '2015-12-27' '2015-12-22' '2015-12-23' '2015-12-24' '2015-12-29' '2015-12-28' '2015-12-20' '2015-12-30' '2016-01-02' '2016-01-01' '2015-12-25' '2016-01-03' '2016-01-04' '2016-01-11' '2016-01-07' '2015-12-21' '2016-01-09' '2016-01-10' '2016-01-08' '2016-01-06' '2016-01-12' '2016-01-13' '2016-01-23' '2016-02-09' '2016-01-15' '2016-01-16' '2016-01-17' '2016-01-19' '2016-01-18' '2016-01-21' '2016-01-24' '2016-01-22' '2016-01-29' '2016-01-27' '2016-01-25' '2016-03-08' '2016-01-26' '2016-01-20' '2016-01-30' '2016-02-01' '2016-02-02' '2016-02-08' '2016-02-07' '2016-01-28' '2016-02-05' '2016-02-03' '2016-02-13' '2016-02-10' '2016-02-04' '2016-02-12' '2016-02-11' '2016-02-16' '2016-02-14' '2016-02-15' '2016-02-20' '2016-02-06' '2016-01-14' '2016-02-17' '2016-02-21' '2016-02-24' '2016-02-25' '2016-02-19' '2016-02-18' '2016-02-26' '2016-02-23' '2016-03-05' '2016-02-22' '2016-02-27' '2016-03-03' '2016-03-24' '2016-03-04' '2016-02-29' '2016-03-01' '2016-03-02' '2016-03-30' '2016-03-07' '2016-03-14' '2016-03-21' '2016-03-09' '2016-03-12' '2016-03-22' '2016-03-10' '2016-03-11' '2016-03-20' '2016-03-15' '2016-03-17' '2016-03-16' '2016-03-19' '2016-03-27' '2016-03-18' '2016-03-26' '2016-03-31' '2016-03-28' '2016-03-29' '2016-04-01' '2016-03-23' '2016-04-02' '2016-03-25' '2016-03-13' '2016-04-04' '2016-04-03' '2016-04-05' '2016-04-08' '2016-04-06' '2016-04-09' '2016-04-12' '2016-04-16' '2016-04-17' '2016-04-27' '2016-04-14' '2016-04-18' '2016-04-21' '2016-04-19' '2016-04-20' '2016-04-10' '2016-04-13' '2016-04-11' '2016-04-07' '2016-04-15' '2016-04-22' '2016-04-23' '2016-04-26' '2016-04-28' '2016-04-24' '2016-04-25' '2016-04-29' '2016-04-30' '2016-05-01' '2016-05-10' '2016-05-02' '2016-05-07' '2016-05-08' '2016-05-12' '2016-05-04' '2016-05-06' '2016-05-03' '2016-05-09' '2016-05-05' '2016-05-13' '2016-05-14' '2016-05-18' '2016-05-19' '2016-05-15' '2016-05-16' '2016-05-11' '2016-05-21' '2016-05-22' '2016-05-20' '2016-05-24' '2016-05-25' '2016-05-26' '2016-05-23' '2016-05-27' '2016-05-17' '2016-05-29' '2016-05-28' '2016-05-30' '2016-05-31' '2016-06-01' '2016-06-03' '2016-06-08' '2016-06-02' '2016-06-05' '2016-06-06' '2016-06-13' '2016-06-07' '2016-06-10' '2016-06-11' '2016-06-16' '2016-06-12' '2016-06-14' '2016-06-17' '2016-06-04' '2016-06-18' '2016-06-21' '2016-06-09' '2016-06-24' '2016-06-20' '2016-06-25' '2016-06-22' '2016-06-26' '2016-06-23' '2016-07-01' '2016-06-15' '2016-06-28' '2016-07-02' '2016-06-19' '2016-06-27' '2016-07-04' '2016-06-30' '2016-07-05' '2016-07-08' '2016-07-09' '2016-07-07' '2016-07-12' '2016-06-29' '2016-07-10' '2016-07-15' '2016-07-03' '2016-07-16' '2016-07-14' '2016-07-18' '2016-07-13' '2016-07-06' '2016-07-20' '2016-07-21' '2016-07-23' '2016-07-19' '2016-07-11' '2016-07-28' '2016-07-17' '2016-07-25' '2016-07-22' '2016-07-29' '2016-08-03' '2016-08-02' '2016-08-04' '2016-08-08' '2016-08-10' '2016-08-01' '2016-08-06' '2016-03-06' '2016-08-05' '2016-07-26' '2016-08-07' '2016-07-30' '2016-07-24' '2016-08-12' '2016-07-27' '2016-08-13' '2016-08-18' '2016-08-16' '2016-08-15' '2016-08-17' '2016-08-11' '2016-07-31' '2016-08-19' '2016-09-01' '2016-08-23' '2016-08-26' '2016-08-20' '2016-08-21' '2016-09-04' '2016-08-22' '2016-08-27' '2016-08-25' '2016-08-09' '2016-09-05' '2016-08-24' '2016-09-10' '2016-08-29' '2016-09-09' '2016-08-30' '2016-09-13' '2016-08-31' '2016-09-14' '2016-09-12' '2016-09-15' '2016-08-14' '2016-09-02' '2016-09-08' '2016-09-19' '2016-09-16' '2016-09-07' '2016-09-21' '2016-09-06' '2016-09-22' '2016-09-17' '2016-09-20' '2016-09-03' '2016-09-26' '2016-09-23' '2016-09-18' '2016-09-29' '2016-10-02' '2016-10-01' '2016-09-27' '2016-09-25' '2016-10-05' '2016-09-11' '2016-09-30' '2016-10-09' '2016-10-03' '2016-10-06' '2016-10-11' '2016-09-24' '2016-10-13' '2016-09-28' '2016-10-08' '2016-10-07' '2016-10-16' '2016-08-28' '2016-10-17' '2016-10-18' '2016-10-10' '2016-10-04' '2016-10-15' '2016-10-19' '2016-10-21' '2016-10-12' '2016-10-24' '2016-10-26' '2016-10-23' '2016-10-20' '2016-10-25' '2016-10-27' '2016-10-28' '2016-10-30' '2016-10-29' '2016-11-01' '2016-11-04' '2016-10-14' '2016-11-07' '2016-11-03' '2016-11-10' '2016-11-14' '2016-11-02' '2016-10-31' '2016-11-11' '2016-11-08' '2016-11-05' '2016-11-25' '2016-11-09' '2016-11-20' '2016-11-21' '2016-10-22' '2016-11-22' '2016-11-16' '2016-11-23' '2016-11-17' '2016-11-06' '2016-11-15' '2016-11-13' '2016-11-12' '2016-11-27' '2016-11-19' '2016-11-30' '2016-11-18' '2016-12-02' '2016-12-04' '2016-11-29' '2016-12-07' '2016-11-28' '2016-12-03' '2016-12-06' '2016-11-24' '2016-12-08' '2016-12-05' '2016-12-10' '2016-12-13' '2016-12-14' '2016-12-16' '2016-12-15' '2016-12-17' '2016-12-19' '2016-12-21' '2016-12-20' '2016-12-22' '2016-12-23' '2016-12-24' '2016-12-01' '2016-12-27' '2016-12-29' '2016-12-30' '2016-12-12' '2017-01-02' '2016-12-11' '2017-01-03' '2017-01-04' '2017-01-01' '2016-12-26' '2017-01-06' '2016-12-28' '2016-12-18' '2017-01-10' '2017-01-11' '2017-01-07' '2017-01-12' '2017-01-16' '2017-01-14' '2017-01-13' '2017-01-05' '2017-01-17' '2017-01-20' '2016-12-09' '2017-01-26' '2016-12-31' '2017-01-23' '2017-01-27' '2017-01-28' '2017-01-19' '2017-01-25' '2017-01-24' '2017-01-29' '2017-01-18' '2016-12-25' '2017-01-15' '2017-01-21' '2017-02-01' '2017-02-02' '2017-01-31' '2017-02-03' '2017-02-04' '2017-02-06' '2017-02-07' '2017-02-08' '2017-01-30' '2017-02-09' '2017-01-09' '2017-02-11' '2017-02-10' '2017-02-12' '2017-02-13' '2017-02-14' '2017-02-16' '2017-02-17' '2017-02-18' '2017-02-19' '2017-02-20' '2017-02-15' '2017-02-21' '2017-02-22' '2017-02-26' '2017-02-23' '2017-02-24' '2017-02-25' '2017-02-28' '2017-03-05' '2017-02-27' '2017-03-03' '2017-03-06' '2017-03-02' '2017-03-08' '2017-03-09' '2017-03-10' '2017-03-07' '2017-03-12' '2017-03-13' '2017-03-14' '2017-03-01' '2017-03-18' '2017-03-17' '2017-03-24' '2017-03-22' '2017-03-26' '2017-03-27' '2017-03-11' '2017-03-28' '2017-03-29' '2017-03-30' '2017-03-31' '2017-03-19' '2017-01-22' '2017-04-02' '2017-03-20' '2017-04-03' '2017-01-08' '2017-03-23' '2017-04-05' '2017-02-05' '2017-04-04' '2017-03-15' '2017-04-07' '2017-03-25' '2017-04-08' '2017-04-06' '2017-03-21' '2017-04-10' '2017-04-01' '2017-04-11' '2017-04-13' '2017-04-15' '2017-04-12' '2017-03-04' '2017-04-19' '2017-04-22' '2017-04-20' '2017-05-02' '2017-04-09' '2017-04-23' '2017-04-24' '2017-04-16' '2017-04-28' '2017-04-18' '2017-04-26' '2017-04-25' '2017-04-17' '2017-04-21' '2017-05-03' '2017-05-04' '2017-03-16' '2017-05-05' '2017-04-29' '2017-04-14' '2017-05-08' '2017-04-27' '2017-05-11' '2017-05-01' '2017-05-10' '2017-05-13' '2017-05-06' '2017-05-14' '2017-05-16' '2017-04-30' '2017-05-15' '2017-05-07' '2017-05-09' '2017-05-17' '2017-05-21' '2017-05-12' '2017-05-22' '2017-05-24' '2017-05-23' '2017-05-25' '2017-05-26' '2017-05-28' '2017-05-27' '2017-05-29' '2017-05-19' '2017-05-31' '2017-05-20' '2017-06-01' '2017-05-30' '2017-06-02' '2016-11-26' '2017-06-04' '2017-06-05' '2017-06-06' '2017-06-07' '2017-05-18' '2017-06-09' '2017-06-10' '2017-06-11' '2017-06-12' '2017-06-14' '2017-06-08' '2017-06-16' '2017-06-13' '2017-06-03' '2017-06-24' '2017-06-20' '2017-06-19' '2017-06-21' '2017-06-26' '2017-06-27' '2017-06-22' '2017-06-28' '2017-06-15' '2017-06-29' '2017-06-30' '2017-06-18' '2017-07-04' '2017-07-08' '2017-07-05' '2017-07-03' '2017-07-07' '2017-07-01' '2017-07-06' '2017-07-11' '2017-07-12' '2017-06-23' '2017-07-13' '2017-07-02' '2017-07-10' '2017-07-14' '2017-07-15' '2017-07-16' '2017-07-18' '2017-07-17' '2017-07-19' '2017-07-20' '2017-07-21' '2017-06-25' '2017-06-17' '2017-07-24' '2017-07-26' '2017-07-09' '2017-07-27' '2017-07-28' '2017-07-31' '2017-07-29' '2017-07-22' '2017-08-02' '2017-08-01' '2017-08-03' '2017-08-04' '2017-07-25' '2017-07-23' '2017-08-09' '2017-08-10' '2017-07-30' '2017-08-07' '2017-08-13' '2017-08-05' '2017-08-14' '2017-08-08' '2017-08-16' '2017-08-17' '2017-08-15' '2017-08-18' '2017-08-20' '2017-08-22' '2017-08-06' '2017-08-25' '2017-08-26' '2017-08-23' '2017-08-11' '2017-08-27' '2017-08-21' '2017-08-29' '2017-08-31' '2017-08-12' '2017-08-19' '2016-01-31' '2017-09-01' '2017-08-28' '2015-04-03' '2015-01-21' '2015-01-28' '2015-01-29' '2015-01-30' '2015-02-02' '2015-02-05' '2015-02-06' '2015-02-09' '2015-02-10' '2015-02-11' '2015-02-12' '2015-02-19' '2015-02-20' '2015-02-23' '2015-02-24' '2015-02-25' '2015-02-26' '2015-02-27' '2015-03-03' '2015-03-04' '2015-03-06' '2015-03-09' '2015-03-11' '2015-03-12' '2015-03-18' '2015-04-02' '2015-06-14' '2015-04-08' '2015-04-16' '2015-04-25' '2015-04-28' '2015-05-08' '2017-09-06' '2016-02-28' '2015-12-09' '2015-12-14' '2017-09-09' '2017-09-02' '2017-08-24' '2017-08-30' '2017-09-03' '2017-09-04' '2017-09-05' '2017-09-07' '2017-09-08' '2017-09-10' '2017-09-12' '2017-09-14' '2015-04-30' '2015-04-21' '2015-04-05' '2015-03-13' '2015-05-05' '2015-03-29' '2015-06-10' '2015-04-27' '2014-10-17' '2015-01-20' '2015-02-17' '2015-03-10' '2015-03-23'] ------------------------------------------------- name ['Ernest Barnes' 'Andrea Baker' 'Rebecca Parker' ... 'Wesley Aguilar' 'Caroline Conley MD' 'Ariana Michael'] ------------------------------------------------- email ['Ernest.Barnes31@outlook.com' 'Andrea_Baker94@aol.com' 'Rebecca_Parker@comcast.net' ... 'Mary_Morales@hotmail.com' 'MD_Caroline@comcast.net' 'Ariana_M@xfinity.com'] ------------------------------------------------- phone-number ['669-792-1661' '858-637-6955' '652-885-2745' ... '395-518-4100' '531-528-1017' '422-804-6403'] ------------------------------------------------- credit_card ['************4322' '************9157' '************3734' ... '************9170' '************6349' '************7959'] -------------------------------------------------
df.drop(['name','email','phone-number','credit_card'],axis=1,inplace=True)
df['reservation_status_date'] = pd.to_datetime(df['reservation_status_date'])
df.dtypes
hotel object is_canceled int64 lead_time int64 arrival_date_year int64 arrival_date_month object arrival_date_week_number int64 arrival_date_day_of_month int64 stays_in_weekend_nights int64 stays_in_week_nights int64 adults int64 children float64 babies int64 meal object country object market_segment object distribution_channel object is_repeated_guest int64 previous_cancellations int64 previous_bookings_not_canceled int64 reserved_room_type object assigned_room_type object booking_changes int64 deposit_type object agent float64 company float64 days_in_waiting_list int64 customer_type object adr float64 required_car_parking_spaces int64 total_of_special_requests int64 reservation_status object reservation_status_date datetime64[ns] dtype: object
df.isnull().sum()
hotel 0 is_canceled 0 lead_time 0 arrival_date_year 0 arrival_date_month 0 arrival_date_week_number 0 arrival_date_day_of_month 0 stays_in_weekend_nights 0 stays_in_week_nights 0 adults 0 children 4 babies 0 meal 0 country 488 market_segment 0 distribution_channel 0 is_repeated_guest 0 previous_cancellations 0 previous_bookings_not_canceled 0 reserved_room_type 0 assigned_room_type 0 booking_changes 0 deposit_type 0 agent 16340 company 112593 days_in_waiting_list 0 customer_type 0 adr 0 required_car_parking_spaces 0 total_of_special_requests 0 reservation_status 0 reservation_status_date 0 dtype: int64
df.drop(['agent','company'],axis=1,inplace=True)
df['country'].value_counts()
PRT 48590
GBR 12129
FRA 10415
ESP 8568
DEU 7287
...
DJI 1
BWA 1
HND 1
VGB 1
NAM 1
Name: country, Length: 177, dtype: int64
df['country'].fillna('PRT',inplace=True)
df['children'].fillna(df['children'].median,inplace=True)
df.isnull().sum()
hotel 0 is_canceled 0 lead_time 0 arrival_date_year 0 arrival_date_month 0 arrival_date_week_number 0 arrival_date_day_of_month 0 stays_in_weekend_nights 0 stays_in_week_nights 0 adults 0 children 0 babies 0 meal 0 country 0 market_segment 0 distribution_channel 0 is_repeated_guest 0 previous_cancellations 0 previous_bookings_not_canceled 0 reserved_room_type 0 assigned_room_type 0 booking_changes 0 deposit_type 0 days_in_waiting_list 0 customer_type 0 adr 0 required_car_parking_spaces 0 total_of_special_requests 0 reservation_status 0 reservation_status_date 0 dtype: int64
plt.figure(figsize=(15, 5))
sns.boxplot(x='adr',data=df,color='#ff1a1a')
<AxesSubplot:xlabel='adr'>
plt.figure(figsize=(12, 5))
sns.kdeplot(x="adr",data=df)
<AxesSubplot:xlabel='adr', ylabel='Density'>
df = df[df['adr']<5000]
plt.figure(figsize=(15, 5))
sns.boxplot(x='adr',data=df,color='#ff1a1a')
<AxesSubplot:xlabel='adr'>
df['is_canceled'].value_counts()
0 75166 1 44223 Name: is_canceled, dtype: int64
df['is_canceled'].replace([0,1],['Not_Cancelled','Cancelled'],inplace=True)
f,ax=plt.subplots(1,1,figsize=(15,8))
sns.countplot(x=df['is_canceled'],data=df,palette = 'bright',saturation=0.95)
ax.set_title('Reservation Status Count:',color='black',size=25)
for container in ax.containers:
ax.bar_label(container,color='black',size=20)
f,ax=plt.subplots(1,1,figsize=(15,8))
sns.countplot(x=df['hotel'],data=df, hue='is_canceled',palette = 'flare',saturation=0.95)
ax.set_title('Reservation Status in different hotels',color='black',size=25)
plt.legend(['Not Cancelled','Cancelled'],fontsize=20)
plt.xlabel('Hotel',color='black',size=20)
plt.ylabel('Number of reservation',color='black',size=20)
for container in ax.containers:
ax.bar_label(container,color='black',size=20)
plt.show()
Resort_Hotel = df[df['hotel'] == 'Resort Hotel']
Resort_Hotel['is_canceled'].value_counts(normalize=True)
Not_Cancelled 0.722366 Cancelled 0.277634 Name: is_canceled, dtype: float64
City_Hotel = df[df['hotel'] == 'City Hotel']
City_Hotel['is_canceled'].value_counts(normalize=True)
Not_Cancelled 0.582738 Cancelled 0.417262 Name: is_canceled, dtype: float64
Resort_Hotel = Resort_Hotel.groupby('reservation_status_date')[['adr']].mean()
City_Hotel = City_Hotel.groupby('reservation_status_date')[['adr']].mean()
f,ax=plt.subplots(1,1,figsize=(20,8))
sns.lineplot( x=Resort_Hotel.index, y=Resort_Hotel['adr'])
sns.lineplot( x=City_Hotel.index, y=City_Hotel['adr'])
ax.set_title('Average daily rate in City and Resort Hotel',color='black',size=25)
plt.xlabel('Reservation date',color='black',size=20)
plt.ylabel('ADR',color='black',size=20)
plt.legend(['Resort_Hotel','City_Hotel'],fontsize=20)
<matplotlib.legend.Legend at 0x254cdb5f280>
df['month'] = df['reservation_status_date'].dt.month_name()
f,ax=plt.subplots(1,1,figsize=(15,8))
sns.countplot(x=df['month'],data=df, hue='is_canceled',palette = 'Blues',saturation=0.95)
ax.set_title('Reservation Status per month',color='black',size=25)
plt.legend(['Not Cancelled','Cancelled'],fontsize=15)
plt.xlabel('Months',color='black',size=20)
plt.ylabel('Number of reservation',color='black',size=20)
plt.show()
f,ax=plt.subplots(1,1,figsize=(15,8))
sns.countplot(x=df['month'],data=df[df['is_canceled'] == 'Cancelled'].groupby('month')[['adr']].sum().reset_index(),palette = 'flare',saturation=0.95)
ax.set_title('ADR per month when Reservation Cancelled',color='black',size=25)
plt.xlabel(' Month',color='black',size=20)
plt.ylabel('ADR',color='black',size=20)
for container in ax.containers:
ax.bar_label(container,color='black',size=20)
plt.show()
Cancelled_data = df[df['is_canceled'] == 'Cancelled']
f,ax=plt.subplots(1,1,figsize=(15,8))
plt.pie(x = Cancelled_data['country'].value_counts().head(10),labels=Cancelled_data['country'].value_counts().head(10).index,autopct='%1.1f%%',shadow=True,explode=[0.1,0,0,0,0,0,0,0,0,0])
ax.set_title('Top 10 country in terms of Cancellation',color='black',size=25)
plt.show()
df['market_segment'].value_counts()
Online TA 56477 Offline TA/TO 24218 Groups 19811 Direct 12606 Corporate 5295 Complementary 743 Aviation 237 Undefined 2 Name: market_segment, dtype: int64
df['market_segment'].value_counts(normalize=True)
Online TA 0.473050 Offline TA/TO 0.202850 Groups 0.165937 Direct 0.105588 Corporate 0.044351 Complementary 0.006223 Aviation 0.001985 Undefined 0.000017 Name: market_segment, dtype: float64
f,ax=plt.subplots(1,1,figsize=(15,8))
sns.countplot(x=df['market_segment'],data=df,order=df['market_segment'].value_counts().head(10).index,palette = 'bright',saturation=0.95)
ax.set_title('Reservation Channels',color='black',size=25)
plt.xlabel('Channels',color='black',size=20)
plt.ylabel('Number of reservation',color='black',size=20)
for container in ax.containers:
ax.bar_label(container,color='black',size=20)
plt.show()
Not_Cancelled_data = df[df['is_canceled'] == 'Not_Cancelled']
Not_Cancelled_data_adr = Not_Cancelled_data.groupby('reservation_status_date')[['adr']].mean()
Cancelled_data_adr = Cancelled_data.groupby('reservation_status_date')[['adr']].mean()
f,ax=plt.subplots(1,1,figsize=(20,8))
sns.lineplot( x=Not_Cancelled_data_adr.index, y=Not_Cancelled_data_adr['adr'])
sns.lineplot( x=Cancelled_data_adr.index, y=Cancelled_data_adr['adr'])
ax.set_title('Average Daily Rate',color='black',size=25)
plt.xlabel('Reservation date',color='black',size=20)
plt.ylabel('ADR',color='black',size=20)
plt.legend(['Not Cancelled','Cancelled'],fontsize=20)
<matplotlib.legend.Legend at 0x254c7b463d0>